* Title: 	asec_clean.do
* Version: 	23 May 2018
* Purpose: 	Clean data from IPUMS ASEC extract and create new variables


*******************************************************************************
* (0) Start of file
*******************************************************************************

capture log close
log using log/asec_clean, replace
set more off
clear all
macro drop _all


*******************************************************************************
* (1) Load data
*******************************************************************************

use dta/raw/asec_raw, clear

* Keep only 1965-1967 and 1977-2016 (years for which we can observe state of residence)
*keep if inrange(year,1965,1967) | inrange(year,1977,2016)
*keep if inrange(year,1965,2016)

*******************************************************************************
* (2) Create data labels
*******************************************************************************

label define yesno 1 yes 0 no
label define maritallevels 1 Never_Married 2 Married 3 Separated_Divorced /// 
	  4 Widowed
label define racegroups 1 "White" 2 "African-American" 3 "Hispanic" 4 "Other"
label define familytypes 0 Alone 1 Spouse_Only 2 Parent_Only 3 Spouse_and_Parent /// 
	  4 Other					
label define edlevels 1 Less_than_HS 2 High_School 3 Some_College ///
	  4 College_4yr_Grad
label define indgroups 1 PrimaryInd 2 Construction 3 NondurableMan ///
	  4 DurableMan 5 Transport 6 Wholesale 7 Retail 8 Finance 9 Business /// 
	  10 Personal 11 Professional 12 Public
label define employ 0 Employed  1 STJobless 2 LTJobless 3 Jobless_Unknown
label define joblessstatus 0 "Employed" 1 "Unemployed" 2 "NILF, Want Job" ///
						   3 "NILF, Don't Want Job" 4 "NILF, Unknown"
			
			
*******************************************************************************
* (3) Create new variables (core/geography)
*******************************************************************************

* Create serial number unique identifier
egen UniqueHID = concat(year serial), p("_")

label var UniqueHID "Unique code for household serial number"
notes UniqueHID: Household code combining serial number and year ///
				 \ asec_clean.do BA TS 
				 
* Merge with indicator for region
merge m:1 statefip using dta/state_region, assert(master matched) nogen


*******************************************************************************
* (4) Create new variables (demographics)
*******************************************************************************

* Create dummy for prime-age male
generate PrimeMaleD = (age >=25 & age <=54 & sex == 1)

label var PrimeMaleD "Dummy for Prime Male"
notes PrimeMaleD: Dummy for prime male group \ asec_clean.do BA TS 
label values PrimeMaleD yesno

* Create dummy for prime-age female
generate PrimeFemaleD = (age >=25 & age <=54 & sex == 2)

label var PrimeFemaleD "Dummy for Prime Female"
notes PrimeFemaleD: Dummy for prime female group \ asec_clean.do BA TS 
label values PrimeFemaleD yesno

* Create variable for marital status
generate MaritalI = (marst == 6)
replace MaritalI = 2 if (marst == 1 | marst == 2)
replace MaritalI = 3 if (marst == 3 | marst == 4)
replace MaritalI = 4 if (marst == 5)

label var MaritalI "Indicator for marital status"
notes MaritalI: Indicator variable for marital status \ asec_clean.do BA TS 
label values MaritalI maritallevels

* Create dummy for Hispanic status
generate HispanicB = (hispan > 0 & hispan < 900)

label var HispanicB "Dummy for Hispanic status"
notes HispanicB: Dummy for Hispanic status \ asec_clean.do BA TS 
label values HispanicB yesno

* Create indicator for race
generate RaceI = 4
replace RaceI = 1 if (race == 100 & HispanicB == 0)
replace RaceI = 2 if (race == 200 & HispanicB == 0)
replace RaceI = 3 if (HispanicB == 1)

label var RaceI "Indicator for race"
notes RaceI: Reported race \ asec_clean.do BA TS 
label values RaceI racegroups

* Create dummy for living with spouse
generate SpouseB = (age_sp > 0 & age_sp < 150)

label var SpouseB "Dummy for living with spouse"
notes SpouseB: Dummy for living with spouse \ asec_clean.do BA TS 
label values SpouseB yesno

* Generate dummy for living with parent
generate ParentB = ((age_mom > 0 & age_mom < 150) ///
					| (age_mom2 > 0 & age_mom2 < 150) ///
					| (age_pop > 0 & age_pop < 150) ///
					| (age_pop2 > 0 & age_pop2 < 150))

label var ParentB "Dummy for living parent"
notes ParentB: Dummy for living parent \ asec_clean.do BA TS 
label values ParentB yesno

* Create dummy for living with spouse and parents
generate SpouseAndParentB = (SpouseB == 1) & (ParentB == 1)

label var SpouseAndParentB "Dummy for living with spouse and parent"
notes SpouseAndParentB: Dummy for living with spouse and parent \ asec_clean02g.do BA TS 
label values SpouseAndParentB yesno	

* Number of adults in household
generate adult = (age>=18)
bysort year serial: egen H_Size_Adults = total(adult)

* Create indicator for family type (adults only)
generate FamilyI = .
replace  FamilyI = 0 if (adult == 1 & SpouseB == 0 & ParentB == 0 & H_Size_Adults == 1)
replace	 FamilyI = 1 if (adult == 1 & SpouseB == 1 & ParentB == 0)
replace  FamilyI = 2 if (adult == 1 & SpouseB == 0 & ParentB == 1)
replace  FamilyI = 3 if (adult == 1 & SpouseB == 1 & ParentB == 1)
replace  FamilyI = 4 if (adult == 1 & SpouseB == 0 & ParentB == 0 & H_Size_Adults > 1)

label define familytypes 0 Alone 1 Spouse_Only 2 Parent_Only 3 Spouse_and_Parent /// 
	  4 Other, replace

label var FamilyI "Indicator for family type (adults only)"
notes FamilyI: Indicator for family type \ asec_clean.do BA TS 
label values FamilyI familytypes

*******************************************************************************
* (5) Create new variables (education)
*******************************************************************************

* Create variable for education level
generate EducationI = (educ >= 2 & educ < 72)
replace EducationI = 2 if (educ >= 72 & educ <= 73)
replace EducationI = 3 if (educ >= 80 & educ < 101)
replace EducationI = 4 if (educ >= 110 & educ < 126)

label var EducationI "Indicator for education"
notes EducationI: Indicator variable for education levels ///
				  \ asec_clean.do BA TS 
label values EducationI edlevels

* Create dummy for currently in education
generate InSchoolB = 0
replace InSchoolB = 1 if (schlcoll == 1 | schlcoll == 3 | empstat == 33)

label var InSchoolB "Dummy for in school"
notes InSchoolB: Dummy variable for full-time schooling ///
				  \ asec_clean.do BA TS 
label values InSchoolB yesno


*******************************************************************************
* (6) Create new variables (labor force)
*******************************************************************************

* Create dummy for non-employment
generate UnempB = .
replace UnempB = 1 if (empstat >= 20 & empstat < 40)
replace UnempB = 0 if (empstat == 10 | empstat == 12)

label var UnempB "Dummy for not employed"
notes UnempB: Dummy for not employed \ asec_clean.do BA TS 
label values UnempB yesno

* Create dummy for employment
generate EmpB = 1-UnempB

label var EmpB "Dummy for employed"
notes EmpB: Dummy for employed \ asec_clean.do BA TS 
label values EmpB yesno

* Create indicator for jobless status
generate JoblessStatusI = 0
replace JoblessStatusI = 1 if (empstat >= 20 & empstat < 23)
replace JoblessStatusI = 2 if ((empstat >= 30 & empstat < 40) & (wantjob == 2))
replace JoblessStatusI = 3 if ((empstat >= 30 & empstat < 40) & (wantjob == 1))
replace JoblessStatusI = 4 if ((empstat >= 30 & empstat < 40) & (wantjob != 1 & wantjob != 2))

label var JoblessStatusI "Indicator for jobless status"
notes JoblessStatusI: Indicator for jobless status \ asec_clean.do BA TS 
label values JoblessStatusI joblessstatus

* Create dummy for work in last 12 months
generate LastWorkB = (wkswork1 > 0 & wkswork1 <= 52)
replace LastWorkB = . if wkswork1 == .

label var LastWorkB "Dummy for worked in past 12 months"
notes LastWorkB: Dummy for worked in past 12 months \ asec_clean.do BA TS 
label values LastWorkB yesno

* Create dummy for long-term jobless (not employed, no work in past 12 months)
generate LTJoblessB = 0
replace LTJoblessB = (LastWorkB == 0 & UnempB == 1)

label var  LTJoblessB "Dummy for >12 months jobless"
notes  LTJoblessB: Dummy for jobless and no work in past 12 months \ asec_clean.do BA TS 
label values  LTJoblessB yesno

* Create indicator for employment status
gen EmploymentI = 0
replace EmploymentI = 1 if (UnempB == 1 & LTJoblessB == 0)
replace EmploymentI = 2 if (UnempB == 1 & LTJoblessB == 1)
replace EmploymentI = 3 if (UnempB == 1 & LastWorkB == .)

label var EmploymentI  "Employment Status"
notes EmploymentI: Employment status \ asec_clean.do BA TS 
label values EmploymentI employ

* Create indicator variable for industry (only for employed)
generate	IndustryI = .
replace 	IndustryI = 1  if (ind1990 >= 010 & ind1990 <= 050) & EmpB==1
replace 	IndustryI = 2  if (ind1990 == 060) 					& EmpB==1
replace 	IndustryI = 3  if (ind1990 >= 100 & ind1990 <= 229) & EmpB==1
replace 	IndustryI = 4  if (ind1990 >= 230 & ind1990 <= 392) & EmpB==1
replace 	IndustryI = 5  if (ind1990 >= 400 & ind1990 <= 472) & EmpB==1
replace 	IndustryI = 6  if (ind1990 >= 500 & ind1990 <= 571) & EmpB==1
replace 	IndustryI = 7  if (ind1990 >= 580 & ind1990 <= 691) & EmpB==1
replace 	IndustryI = 8  if (ind1990 >= 700 & ind1990 <= 712) & EmpB==1
replace 	IndustryI = 9  if (ind1990 >= 721 & ind1990 <= 760) & EmpB==1
replace 	IndustryI = 10 if (ind1990 >= 761 & ind1990 <= 810) & EmpB==1
replace 	IndustryI = 11 if (ind1990 >= 812 & ind1990 <= 893) & EmpB==1
replace 	IndustryI = 12 if (ind1990 >= 900 & ind1990 <= 932) & EmpB==1
assert !mi(IndustryI) if EmpB==1 & year>=1977

label var IndustryI "Indicator for industry"
notes IndustryI: Indicator variable for industry \ asec_clean.do BA TS 
label values IndustryI indgroups


*******************************************************************************
* (7) Create new variables (disability)
*******************************************************************************

* Create dummy for disability based on reported income
* Note: Excludes NIU value 99999
generate DisabilityB = ((incss >0 & incss < 99999) | ///
						(incssi >0 & incssi < 99999))

label var DisabilityB "SSI SSDI disability dummy"
notes DisabilityB: Dummy for disability based on reported income ///
				   \ asec_clean.do BA TS 
label values DisabilityB yesno

* Create dummy for disability based NILF status
generate Disability2B = (empstat == 32)

label var Disability2B "Employment status disability dummy"
notes Disability2B: Dummy for disability based on NILF status ///
	                \ asec_clean.do BA TS 
label values Disability2B yesno

* Create dummy for disability based on reported disability
generate Disability3B = (disabwrk == 2)

label var Disability3B "Self reported disability dummy"
notes Disability3B: Dummy for disability based on self reports ///
	                \ asec_clean.do BA TS 
label values Disability3B yesno


*******************************************************************************
* (8) Create new variables (income)
*******************************************************************************

* Drop NIU values
* Notes: (1) NIU codes vary depending on variable in IPUMS ASEC
*        (2) Top codes vary substantially over time, and have both increased and decreased
*        (3) Some codes use 99999 for NIU, and 99997 for top code. Others have
*			 99999 as a standard value or top-code. In some cases 9s ending in 8 are used for missing value
*        (4) NIU and Missing are eliminated, top codes are not further adjusted for analysis

replace inctot = . if (inctot == 99999999 | inctot == 99999998)

foreach var of varlist 	incbus ///
						incfarm  ///
						incwage {
	replace `var' = . if (`var' == 9999999 | `var' == 9999998)
}

foreach var of varlist 	ftotval ///
						incdisab ///
						incdivid ///
						incretir ///
						incsurv {
	replace `var' = . if (`var' == 999999)
}

foreach var of varlist 	incalim ///
						incasist ///
						incchild ///
						inceduc ///
						incint ///
						incother ///
						incss ///
						incssi ///
						incrent ///
						incunemp ///
						incvet ///
						incwkcom ///
						incwelfr {
	replace `var' = . if (`var' == 99999)
}

* Alimony income variable is missing for 2015-2016 ASEC - recoding to 0.
replace incalim = 0 if year>=2015

* Merge with CPI factor to get real incomes
merge m:1 year using dta/cpi_factor, keep(master matched) nogen

* Construct real income variables
gen Inc_FamilyTotal		= CPI_Factor * (ftotval)
gen Inc_PersonalTotal 	= CPI_Factor * (inctot)
gen Inc_Wage			= CPI_Factor * (incwage)
gen Inc_Investment 		= CPI_Factor * (incbus + incfarm + incint + incdivid + incrent)
gen Inc_Retirement 		= CPI_Factor * (incretir)
gen Inc_WorkersComp 	= CPI_Factor * (incwkcom)
gen Inc_FamilyTransfer	= CPI_Factor * (incasist)
gen Inc_GovernmentTotal	= CPI_Factor * (incunemp + incss + incssi + incdisab + incvet + incwelfr + inceduc)
gen Inc_Unemp 			= CPI_Factor * (incunemp)
gen Inc_Disability 		= CPI_Factor * (incss + incssi + incdisab)
gen Inc_Vet 			= CPI_Factor * (incvet)
gen Inc_GovernmentOther	= CPI_Factor * (incwelfr + inceduc)
gen Inc_Other 			= CPI_Factor * (incsurv + incchild + incalim + incother)


*******************************************************************************
* (9) Restrict sample
*******************************************************************************

* Keep only ages 16+
keep if age>=16 & !mi(age)

* Drop armed forces
drop if empstat==1

* Drop 3/8 file from 2014 ASEC
drop if hflag==1

* Drop negative weights (occur in some years in the 1960s)
drop if wtsupp<0

*******************************************************************************
* (10) End of file
*******************************************************************************

keep year wtsupp statefip sex age UniqueHID-Inc_Other
compress

label data "Create ASEC data \ 05-23-2018"
notes: asec_clean \ Clean data created from asec_raw ///
	   \ asec_clean.do \ BA TS 
datasignature set
save dta/asec_clean, replace

log close
exit, clear
